In [1]:
from glob import glob
import raw_taq
import pandas as pd
In [2]:
import numpy as np
from statistics import mode
def print_stats(chunk):
#find the max bid price
max_price = max(chunk['Bid_Price'])
#find the min bid price
min_price = min(chunk['Bid_Price'])
#find the mean of bid price
avg_price = np.mean(chunk['Bid_Price'])
#find the mod of bid price
try:
mod_price = mode(chunk['Bid_Price'])
except StatisticsError:
mod_price = np.nan
#find the sd of bid price
sd_price = np.std(chunk['Bid_Price'])
print("Max bid price: ", max_price, "\n", "Min bid price: ", min_price, "\n",
"Mean bid price: ", avg_price, "\n", "Mod bid price: ", mod_price, "\n",
"Standard deviation bid price: ", sd_price)
In [7]:
# You can run this if you update the raw_taq.py file
from importlib import reload
reload(raw_taq)
Out[7]:
In [5]:
# I grab the [0]'th fname in the glob
fname = glob('../local_data/EQY_US_ALL_BBO_*.zip')[0]
test_run = raw_taq.TAQ2Chunks(fname)
In [6]:
chunk_gen = test_run.convert_taq(20)
In [26]:
type(chunk_gen)
Out[26]:
In [7]:
# You can get one chunk this way
chunk = next(chunk_gen)
chunk[0]
Out[7]:
In [28]:
# If you want just the type
chunk.dtype
Out[28]:
In [65]:
# Numpy record arrays support string indexing to get columns
print(chunk['Bid_Price'])
print(chunk["Ask_Price"])
In [66]:
# Numeric indexing gives a row
chunk[0]
Out[66]:
In [31]:
# And you can do both
chunk['Bid_Price'][6]
Out[31]:
In [32]:
# Or
chunk[6]['Bid_Price']
Out[32]:
In [13]:
chunk_df = pd.DataFrame(chunk)
In [14]:
chunk_df
Out[14]:
In [15]:
# note that time is not correctly parsed yet:
chunk_df.Time
Out[15]:
In [10]:
chunk.dtype
Out[10]:
In [8]:
fname = glob('../local_data/EQY_US_ALL_BBO_*.zip')[0]
local_taq = raw_taq.TAQ2Chunks(fname)
chunk_gen = local_taq.convert_taq(20)
first_chunk = next(chunk_gen)
curr_symbol = first_chunk['Symbol_root'][0]
accum = pd.DataFrame(first_chunk)
processed_symbols = 0
for chunk in chunk_gen:
where_symbol = curr_symbol == chunk['Symbol_root']
if where_symbol.all():
accum.append(pd.DataFrame(chunk))
else:
same = chunk[where_symbol]
accum.append(pd.DataFrame(same))
# Compute the stats
print('Current symbol:', curr_symbol, len(curr_symbol), 'records')
print_stats(accum)
processed_symbols += 1
if processed_symbols > 3:
break
diff = chunk[~where_symbol]
accum = pd.DataFrame(diff)
curr_symbol = accum.Symbol_root[0]
In [9]:
b'AA ' == b'AA '
Out[9]:
In [16]:
def simple_fun(l):
for item in l:
yield item
In [17]:
simple_gen = simple_fun(['a', 'b', 1, 2])
In [18]:
type(simple_gen)
Out[18]:
In [19]:
next(simple_gen)
Out[19]:
In [20]:
for item in simple_fun(['a', 'b', 1, 2]):
print(item)
In [ ]: